** Clean and merge ACS household income distribution statistics 
** JHL 

*************************************
** Set up workspace
*************************************
version 14.0
clear all
set more off

cd "${path_home}"
adopath + ../programs

** log using "${path_log}/c03_acs_hh_income", text replace

*************************************
** Start work here
*************************************
timer on 1 

*************************************
** [1] Clean and append  
*************************************

** ACS 5-year estimates, 09-15 available, all counties 

foreach i in 09 10 11 12 13 14 15 {
	import delimited "${path_big_dta}/acs/hh_income/raw/ACS_`i'_5YR_S1901/ACS_`i'_5YR_S1901.csv", varnames(1) clear 
	
	* Label variables with first row of obs 
	foreach var of varlist * {
		label variable `var' "`=`var'[1]'"
		  replace `var'="" if _n==1
		  destring `var', replace
	}
	
	drop in 1 
	
	save "${path_big_dta}/acs/hh_income/dta/acs_hh_income_5yr_`i'", replace
}

clear
gen year = "" 
foreach i in 09 10 11 12 13 14 15 {
	append using "${path_big_dta}/acs/hh_income/dta/acs_hh_income_5yr_`i'", force
	replace year = "`i'" if year == ""
}
destring year, replace
replace year = year + 2000 

rename geoid2 fips 

save "${path_big_dta}/acs/hh_income/dta/acs_hh_income_5yr_0915", replace 

*************************************
** [2] Create fips-year panel of income distribution variables from ACS
*************************************

use "${path_big_dta}/acs/hh_income/dta/acs_hh_income_5yr_0915", clear 

** Note: vc12 refers to median income in 2009, but vc13 refers to median income from 2010-2015
keep year fips hc01_est_vc02 hc01_est_vc03 hc01_est_vc04 hc01_est_vc05 hc01_est_vc06 hc01_est_vc07 hc01_est_vc08 hc01_est_vc09 hc01_est_vc10 hc01_est_vc11 hc01_est_vc12 hc01_est_vc13

* Convert from percentage to fraction 
foreach v of varlist hc01_est_vc02 - hc01_est_vc11 {
	replace `v' = `v' / 100 
}

** Generate cumulative distribution 
foreach i in 02 03 04 05 06 07 08 09 10 11 {
	egen hc01_est_vc`i'_cumul_5yr = rowtotal(hc01_est_vc02 - hc01_est_vc`i') if hc01_est_vc02!=. 
}

foreach v of varlist hc01_est_vc02 hc01_est_vc03 hc01_est_vc04 hc01_est_vc05 hc01_est_vc06 hc01_est_vc07 hc01_est_vc08 hc01_est_vc09 hc01_est_vc10 hc01_est_vc11 hc01_est_vc12 hc01_est_vc13 {
	rename `v' `v'_5yr 
}

** Use VZ state MW data 
gen fips_state_code = int(fips/1000)
gen fips_county_code = fips - fips_state_code*1000
cap noi gen state = fips_state_code 
gen statefips=fips_state_code

** Non-matches: not one of the 50 states + DC (state was 72/78)
merge m:1 statefips year using "${path_big_dta}/mw/VZ/VZ_state_stata/VZ_state_annual.dta", keep(match) nogen 

** Annual wage earned by MW worker assuming 40 hours and 52 weeks / median income 
gen mw_over_med_i_county_5yr = (mean_mw*40*52) / hc01_est_vc12_5yr 
	replace mw_over_med_i_county_5yr = (mean_mw*40*52) / hc01_est_vc13_5yr if year >= 2010 

gen frac_b_25K_5yr = hc01_est_vc04_cumul_5yr

keep year fips fips_state_code fips_county_code frac_b_25K_5yr mw_over_med_i_county_5yr 
rename (mw_over_med_i_county_5yr) (kaitz_med_c_5yr)

save "${path_big_dta}/acs/hh_income/dta/acs_hh_income_dist_0915", replace 

*************************************
** [3] Create fips panel of income distribution variables from ACS and quantiles, 07-15
*************************************
	
use "${path_big_dta}/acs/hh_income/dta/acs_hh_income_dist_0915", clear 

** 5 yr variables, by year 
foreach y of numlist 2009/2015 {
	foreach v of varlist frac_b_25K_5yr kaitz_med_c_5yr {
		gen temp = `v' if year == `y'
		bys fips: egen `v'_y`y'_c = min(temp) 
		drop temp
	}	
}

** Means across period 
foreach v of varlist frac_b_25K_5yr kaitz_med_c_5yr {
	egen m_`v'=mean(`v'), by(fips)
}

duplicates drop fips, force

** Create quantiles for both 3 and 5 yr variables 
foreach q of numlist 2 4 {
	foreach y of numlist 2009/2015 {
		xtile frac_b_25K_5yr_y`y'_q`q'_c = frac_b_25K_5yr_y`y'_c, nq(`q')
		xtile kaitz_med_c_5yr_y`y'_q`q'_c = kaitz_med_c_5yr_y`y'_c, nq(`q')
	}
}


drop year kaitz_med_c_5yr frac_b_25K_5yr
sort fips 
save "${path_big_dta}/acs/hh_income/dta/acs_hh_income_dist_q24_0915", replace 

*************************************
** Close workspace
*************************************
timer off 1
timer list 1
** log close 


